library(mosaic)
library(tidyverse)
library(lubridate)
library(DataComputing)
library(rvest)
library(broom)

Research Focus:

As COVID-19 spreads at an alarming rate, a pressing question at a global scale emerges– what factors of a country contribute to the spread of Coronavirus. The factors which we will analyze are: population density, and proximity to origin point (China).

Data Access

Reading in the Data:

Data Source 1: COVID

COVID <- read.csv(file = "total-covid-cases-deaths-per-million.csv")
COVID
COVID %>%
  nrow()
[1] 9487
COVID %>%
  names()
  [1] "total.covid.cases.deaths.per.million" "X"                                    "X.1"                                 
  [4] "X.2"                                  "X.3"                                  "X.4"                                 
  [7] "X.5"                                  "X.6"                                  "X.7"                                 
 [10] "X.8"                                  "X.9"                                  "X.10"                                
 [13] "X.11"                                 "X.12"                                 "X.13"                                
 [16] "X.14"                                 "X.15"                                 "X.16"                                
 [19] "X.17"                                 "X.18"                                 "X.19"                                
 [22] "X.20"                                 "X.21"                                 "X.22"                                
 [25] "X.23"                                 "X.24"                                 "X.25"                                
 [28] "X.26"                                 "X.27"                                 "X.28"                                
 [31] "X.29"                                 "X.30"                                 "X.31"                                
 [34] "X.32"                                 "X.33"                                 "X.34"                                
 [37] "X.35"                                 "X.36"                                 "X.37"                                
 [40] "X.38"                                 "X.39"                                 "X.40"                                
 [43] "X.41"                                 "X.42"                                 "X.43"                                
 [46] "X.44"                                 "X.45"                                 "X.46"                                
 [49] "X.47"                                 "X.48"                                 "X.49"                                
 [52] "X.50"                                 "X.51"                                 "X.52"                                
 [55] "X.53"                                 "X.54"                                 "X.55"                                
 [58] "X.56"                                 "X.57"                                 "X.58"                                
 [61] "X.59"                                 "X.60"                                 "X.61"                                
 [64] "X.62"                                 "X.63"                                 "X.64"                                
 [67] "X.65"                                 "X.66"                                 "X.67"                                
 [70] "X.68"                                 "X.69"                                 "X.70"                                
 [73] "X.71"                                 "X.72"                                 "X.73"                                
 [76] "X.74"                                 "X.75"                                 "X.76"                                
 [79] "X.77"                                 "X.78"                                 "X.79"                                
 [82] "X.80"                                 "X.81"                                 "X.82"                                
 [85] "X.83"                                 "X.84"                                 "X.85"                                
 [88] "X.86"                                 "X.87"                                 "X.88"                                
 [91] "X.89"                                 "X.90"                                 "X.91"                                
 [94] "X.92"                                 "X.93"                                 "X.94"                                
 [97] "X.95"                                 "X.96"                                 "X.97"                                
[100] "X.98"                                 "X.99"                                 "X.100"                               
[103] "X.101"                                "X.102"                                "X.103"                               
[106] "X.104"                                "X.105"                                "X.106"                               
[109] "X.107"                                "X.108"                                "X.109"                               
[112] "X.110"                                "X.111"                                "X.112"                               
[115] "X.113"                                "X.114"                                "X.115"                               
[118] "X.116"                                "X.117"                                "X.118"                               
[121] "X.119"                                "X.120"                                "X.121"                               
[124] "X.122"                                "X.123"                                "X.124"                               
[127] "X.125"                                "X.126"                                "X.127"                               
[130] "X.128"                                "X.129"                                "X.130"                               
[133] "X.131"                                "X.132"                                "X.133"                               
[136] "X.134"                                "X.135"                                "X.136"                               
[139] "X.137"                                "X.138"                                "X.139"                               
[142] "X.140"                                "X.141"                                "X.142"                               
[145] "X.143"                                "X.144"                                "X.145"                               
[148] "X.146"                                "X.147"                                "X.148"                               
[151] "X.149"                                "X.150"                                "X.151"                               
[154] "X.152"                                "X.153"                                "X.154"                               
[157] "X.155"                                "X.156"                                "X.157"                               
[160] "X.158"                                "X.159"                                "X.160"                               
[163] "X.161"                                "X.162"                                "X.163"                               
[166] "X.164"                                "X.165"                                "X.166"                               
[169] "X.167"                                "X.168"                                "X.169"                               
[172] "X.170"                                "X.171"                                "X.172"                               
[175] "X.173"                                "X.174"                                "X.175"                               
[178] "X.176"                                "X.177"                                "X.178"                               
[181] "X.179"                                "X.180"                                "X.181"                               
[184] "X.182"                                "X.183"                                "X.184"                               
[187] "X.185"                                "X.186"                                "X.187"                               
[190] "X.188"                                "X.189"                                "X.190"                               
[193] "X.191"                                "X.192"                                "X.193"                               
[196] "X.194"                                "X.195"                                "X.196"                               
[199] "X.197"                                "X.198"                                "X.199"                               
[202] "X.200"                                "X.201"                                "X.202"                               
[205] "X.203"                                "X.204"                                "X.205"                               
[208] "X.206"                                "X.207"                                "X.208"                               
[211] "X.209"                                "X.210"                                "X.211"                               
[214] "X.212"                                "X.213"                                "X.214"                               
[217] "X.215"                                "X.216"                                "X.217"                               
[220] "X.218"                                "X.219"                                "X.220"                               
[223] "X.221"                                "X.222"                                "X.223"                               
[226] "X.224"                                "X.225"                                "X.226"                               
[229] "X.227"                                "X.228"                                "X.229"                               
[232] "X.230"                                "X.231"                                "X.232"                               
[235] "X.233"                                "X.234"                                "X.235"                               
[238] "X.236"                                "X.237"                                "X.238"                               
[241] "X.239"                                "X.240"                                "X.241"                               
[244] "X.242"                                "X.243"                                "X.244"                               
[247] "X.245"                                "X.246"                                "X.247"                               
[250] "X.248"                                "X.249"                                "X.250"                               
[253] "X.251"                                "X.252"                                "X.253"                               
[256] "X.254"                               
COVID %>%
  head()

Data Source 2: CountryData

CountryData
CountryData %>%
  nrow()
[1] 256
CountryData %>%
  names()
 [1] "country"           "area"              "pop"               "growth"            "birth"             "death"            
 [7] "migr"              "maternal"          "infant"            "life"              "fert"              "health"           
[13] "HIVrate"           "HIVpeople"         "HIVdeath"          "obesity"           "underweight"       "educ"             
[19] "unemploymentYouth" "GDP"               "GDPgrowth"         "GDPcapita"         "saving"            "indProd"          
[25] "labor"             "unemployment"      "family"            "tax"               "budget"            "debt"             
[31] "inflation"         "discount"          "lending"           "narrow"            "broad"             "credit"           
[37] "shares"            "balance"           "exports"           "imports"           "gold"              "externalDebt"     
[43] "homeStock"         "abroadStock"       "elecProd"          "elecCons"          "elecExp"           "elecImp"          
[49] "elecCap"           "elecFossil"        "elecNuc"           "elecHydro"         "elecRenew"         "oilProd"          
[55] "oilExp"            "oilImp"            "oilRes"            "petroProd"         "petroCons"         "petroExp"         
[61] "petroImp"          "gasProd"           "gasCons"           "gasExp"            "gasImp"            "gasRes"           
[67] "mainlines"         "cell"              "netHosts"          "netUsers"          "airports"          "railways"         
[73] "roadways"          "waterways"         "marine"            "military"         
CountryData %>%
  head()

Data Wrangling of COVID Dataset

COVID

Since our analysis is focused on the spread of COVID-19, we select only columns which pertain to the number of COVID-19 cases in countries over time.

TidyCOVID <- COVID %>%
  rename(country = total.covid.cases.deaths.per.million ) %>%
  rename( Code = X ) %>%
  rename(date = X.1 ) %>%
  rename(casesPerMillion = X.3) %>%
  filter(row_number() > 1) %>%
  subset(select = c(1,3,5)) %>%
  mutate( country = as.character(country) ) %>%
  mutate(date = mdy(date)) %>%
  mutate(casesPerMillion = as.integer(casesPerMillion) - 1)
TidyCOVID

WHAT DOES AN INSTANCE REPRESENT NOW??

Select CountryData relevant to our analysis: area (sq km), pop (number of people) calculate new variable: popdensity: number of people per sq km

RelevantCountryData <-
  CountryData %>%
  subset(select = c(1,2,3)) %>%
  mutate(popdensity = round(pop/area, digits = 2))
RelevantCountryData
COVIDGrowth <- inner_join(TidyCOVID, RelevantCountryData, by = c("country"))
COVIDGrowth

Calculate the number of cases in each country by multiplying casesPerMillion by population (in millions). This variable is now a standardized metric with which we can compare countries.

COVIDGrowth <-
  COVIDGrowth %>%
  mutate("cases" = (casesPerMillion * round(pop/1000000, digits = 0))) %>%
  subset(select = c("country", "date", "cases", "pop", "popdensity"))
COVIDGrowth

This table records the first date that a country recorded a nonzero number of COVID-19 cases.

FirstInstance <-
  COVIDGrowth %>%
  filter(cases != 0) %>%
  group_by(country) %>%
  summarise(beginningofspread = min(date))
  
FirstInstance

This table averages the number of case increase per day from the first day a country had COVID-19 to the most recent in the data table (April 5 2020)

DailySpread <-
  left_join(COVIDGrowth, FirstInstance, by = c("country")) %>%
  filter(date == "2020-04-05") %>%
  mutate(dayselapsed = date - beginningofspread) %>%
  mutate(dailyspread = cases / as.numeric(dayselapsed) ) %>%
  subset(select = c("country", "beginningofspread", "dailyspread"))
DailySpread$dailyspread[is.na(DailySpread$dailyspread)] <- 0
DailySpread
COVIDFinal <-
  left_join(COVIDGrowth, DailySpread, by = c("country"))
COVIDFinal
COVIDFinal %>%
  group_by(date) %>%
  summarise(totalcases = sum(cases)) %>%
  ggplot(aes(x = date, y = totalcases)) + 
  geom_point() 

  
COVIDFinal %>%
  group_by(country) %>%
  summarise(dailyspread = mean(dailyspread)) %>%
  arrange(desc(dailyspread)) %>%
  head(20) %>%
  ggplot(aes(x = reorder(country, desc(dailyspread)), y= dailyspread)) +
  geom_bar(stat="identity", position = 'stack', width=.9) +
  theme(axis.text.x=element_text(angle = 60, hjust = 1)) +
  scale_y_continuous(labels = function(x) format(x, scientific = FALSE)) +
  xlab("Country") +
  ylab("Spread of COVID-19 cases Per Day")

COVIDFinal %>%
  ggplot(aes(x = pop, y = dailyspread)) + 
  geom_point()

LS0tCnRpdGxlOiAiRmluYWwgUHJvamVjdCIKb3V0cHV0OiBodG1sX25vdGVib29rCmF1dGhvcnM6ICJFdmVseW4gTXVycmF5IGFuZCBKb3NlcGggUGV2bmVyIgotLS0KCmBgYHtyfQpsaWJyYXJ5KG1vc2FpYykKbGlicmFyeSh0aWR5dmVyc2UpCmxpYnJhcnkobHVicmlkYXRlKQpsaWJyYXJ5KERhdGFDb21wdXRpbmcpCmxpYnJhcnkocnZlc3QpCmxpYnJhcnkoYnJvb20pCmBgYAoKIyMgUmVzZWFyY2ggRm9jdXM6CgpBcyBDT1ZJRC0xOSBzcHJlYWRzIGF0IGFuIGFsYXJtaW5nIHJhdGUsIGEgcHJlc3NpbmcgcXVlc3Rpb24gYXQgYSBnbG9iYWwgc2NhbGUgZW1lcmdlcy0tIHdoYXQgZmFjdG9ycyBvZiBhIGNvdW50cnkgY29udHJpYnV0ZSB0byB0aGUgc3ByZWFkIG9mIENvcm9uYXZpcnVzLiBUaGUgZmFjdG9ycyB3aGljaCB3ZSB3aWxsIGFuYWx5emUgYXJlOiBwb3B1bGF0aW9uIGRlbnNpdHksIGFuZCBwcm94aW1pdHkgdG8gb3JpZ2luIHBvaW50IChDaGluYSkuCgoKIyMgRGF0YSBBY2Nlc3MKCiMgUmVhZGluZyBpbiB0aGUgRGF0YToKCgpEYXRhIFNvdXJjZSAxOiBDT1ZJRApgYGB7cn0KQ09WSUQgPC0gcmVhZC5jc3YoZmlsZSA9ICJ0b3RhbC1jb3ZpZC1jYXNlcy1kZWF0aHMtcGVyLW1pbGxpb24uY3N2IikKQ09WSUQKYGBgCgpgYGB7cn0KQ09WSUQgJT4lCiAgbnJvdygpCmBgYApgYGB7cn0KQ09WSUQgJT4lCiAgbmFtZXMoKQpgYGAKYGBge3J9CkNPVklEICU+JQogIGhlYWQoKQpgYGAKCgoKRGF0YSBTb3VyY2UgMjogQ291bnRyeURhdGEKYGBge3J9CkNvdW50cnlEYXRhCmBgYAoKYGBge3J9CkNvdW50cnlEYXRhICU+JQogIG5yb3coKQpgYGAKYGBge3J9CkNvdW50cnlEYXRhICU+JQogIG5hbWVzKCkKYGBgCmBgYHtyfQpDb3VudHJ5RGF0YSAlPiUKICBoZWFkKCkKYGBgCgoKCgojIyMgRGF0YSBXcmFuZ2xpbmcgb2YgQ09WSUQgRGF0YXNldAoKYGBge3J9CkNPVklECmBgYAoKU2luY2Ugb3VyIGFuYWx5c2lzIGlzIGZvY3VzZWQgb24gdGhlIHNwcmVhZCBvZiBDT1ZJRC0xOSwgd2Ugc2VsZWN0IG9ubHkgY29sdW1ucyB3aGljaCBwZXJ0YWluIHRvIHRoZSBudW1iZXIgb2YgQ09WSUQtMTkgY2FzZXMgaW4gY291bnRyaWVzIG92ZXIgdGltZS4KCmBgYHtyfQpUaWR5Q09WSUQgPC0gQ09WSUQgJT4lCiAgcmVuYW1lKGNvdW50cnkgPSB0b3RhbC5jb3ZpZC5jYXNlcy5kZWF0aHMucGVyLm1pbGxpb24gKSAlPiUKICByZW5hbWUoIENvZGUgPSBYICkgJT4lCiAgcmVuYW1lKGRhdGUgPSBYLjEgKSAlPiUKICByZW5hbWUoY2FzZXNQZXJNaWxsaW9uID0gWC4zKSAlPiUKICBmaWx0ZXIocm93X251bWJlcigpID4gMSkgJT4lCiAgc3Vic2V0KHNlbGVjdCA9IGMoMSwzLDUpKSAlPiUKICBtdXRhdGUoIGNvdW50cnkgPSBhcy5jaGFyYWN0ZXIoY291bnRyeSkgKSAlPiUKICBtdXRhdGUoZGF0ZSA9IG1keShkYXRlKSkgJT4lCiAgbXV0YXRlKGNhc2VzUGVyTWlsbGlvbiA9IGFzLmludGVnZXIoY2FzZXNQZXJNaWxsaW9uKSAtIDEpCgoKYGBgCgoKYGBge3J9ClRpZHlDT1ZJRAoKYGBgCgpXSEFUIERPRVMgQU4gSU5TVEFOQ0UgUkVQUkVTRU5UIE5PVz8/CgoKClNlbGVjdCBDb3VudHJ5RGF0YSByZWxldmFudCB0byBvdXIgYW5hbHlzaXM6IGFyZWEgKHNxIGttKSwgcG9wIChudW1iZXIgb2YgcGVvcGxlKQpjYWxjdWxhdGUgbmV3IHZhcmlhYmxlOiBwb3BkZW5zaXR5OiBudW1iZXIgb2YgcGVvcGxlIHBlciBzcSBrbQoKYGBge3J9CgpSZWxldmFudENvdW50cnlEYXRhIDwtCiAgQ291bnRyeURhdGEgJT4lCiAgc3Vic2V0KHNlbGVjdCA9IGMoMSwyLDMpKSAlPiUKICBtdXRhdGUocG9wZGVuc2l0eSA9IHJvdW5kKHBvcC9hcmVhLCBkaWdpdHMgPSAyKSkKClJlbGV2YW50Q291bnRyeURhdGEKYGBgCgoKCmBgYHtyfQoKQ09WSURHcm93dGggPC0gaW5uZXJfam9pbihUaWR5Q09WSUQsIFJlbGV2YW50Q291bnRyeURhdGEsIGJ5ID0gYygiY291bnRyeSIpKQoKQ09WSURHcm93dGgKCmBgYAoKCgpDYWxjdWxhdGUgdGhlIG51bWJlciBvZiBjYXNlcyBpbiBlYWNoIGNvdW50cnkgYnkgbXVsdGlwbHlpbmcgY2FzZXNQZXJNaWxsaW9uIGJ5IHBvcHVsYXRpb24gKGluIG1pbGxpb25zKS4gVGhpcyB2YXJpYWJsZSBpcyBub3cgYSBzdGFuZGFyZGl6ZWQgbWV0cmljIHdpdGggd2hpY2ggd2UgY2FuIGNvbXBhcmUgY291bnRyaWVzLgpgYGB7cn0KCkNPVklER3Jvd3RoIDwtCiAgQ09WSURHcm93dGggJT4lCiAgbXV0YXRlKCJjYXNlcyIgPSAoY2FzZXNQZXJNaWxsaW9uICogcm91bmQocG9wLzEwMDAwMDAsIGRpZ2l0cyA9IDApKSkgJT4lCiAgc3Vic2V0KHNlbGVjdCA9IGMoImNvdW50cnkiLCAiZGF0ZSIsICJjYXNlcyIsICJwb3AiLCAicG9wZGVuc2l0eSIpKQoKQ09WSURHcm93dGgKYGBgCgoKCgpUaGlzIHRhYmxlIHJlY29yZHMgdGhlIGZpcnN0IGRhdGUgdGhhdCBhIGNvdW50cnkgcmVjb3JkZWQgYSBub256ZXJvIG51bWJlciBvZiBDT1ZJRC0xOSBjYXNlcy4KYGBge3J9CgpGaXJzdEluc3RhbmNlIDwtCiAgQ09WSURHcm93dGggJT4lCiAgZmlsdGVyKGNhc2VzICE9IDApICU+JQogIGdyb3VwX2J5KGNvdW50cnkpICU+JQogIHN1bW1hcmlzZShiZWdpbm5pbmdvZnNwcmVhZCA9IG1pbihkYXRlKSkKICAKRmlyc3RJbnN0YW5jZQoKCmBgYAoKCgoKVGhpcyB0YWJsZSBhdmVyYWdlcyB0aGUgbnVtYmVyIG9mIGNhc2UgaW5jcmVhc2UgcGVyIGRheSBmcm9tIHRoZSBmaXJzdCBkYXkgYSBjb3VudHJ5IGhhZCBDT1ZJRC0xOSB0byB0aGUgbW9zdCByZWNlbnQgaW4gdGhlIGRhdGEgdGFibGUgKEFwcmlsIDUgMjAyMCkKCmBgYHtyfQoKRGFpbHlTcHJlYWQgPC0KICBsZWZ0X2pvaW4oQ09WSURHcm93dGgsIEZpcnN0SW5zdGFuY2UsIGJ5ID0gYygiY291bnRyeSIpKSAlPiUKICBmaWx0ZXIoZGF0ZSA9PSAiMjAyMC0wNC0wNSIpICU+JQogIG11dGF0ZShkYXlzZWxhcHNlZCA9IGRhdGUgLSBiZWdpbm5pbmdvZnNwcmVhZCkgJT4lCiAgbXV0YXRlKGRhaWx5c3ByZWFkID0gY2FzZXMgLyBhcy5udW1lcmljKGRheXNlbGFwc2VkKSApICU+JQogIHN1YnNldChzZWxlY3QgPSBjKCJjb3VudHJ5IiwgImJlZ2lubmluZ29mc3ByZWFkIiwgImRhaWx5c3ByZWFkIikpCgpEYWlseVNwcmVhZCRkYWlseXNwcmVhZFtpcy5uYShEYWlseVNwcmVhZCRkYWlseXNwcmVhZCldIDwtIDAKCkRhaWx5U3ByZWFkCmBgYAoKCgpgYGB7cn0KCkNPVklERmluYWwgPC0KICBsZWZ0X2pvaW4oQ09WSURHcm93dGgsIERhaWx5U3ByZWFkLCBieSA9IGMoImNvdW50cnkiKSkKCmBgYAoKCgpgYGB7cn0KQ09WSURGaW5hbAoKYGBgCgoKYGBge3J9CgpDT1ZJREZpbmFsICU+JQogIGdyb3VwX2J5KGRhdGUpICU+JQogIHN1bW1hcmlzZSh0b3RhbGNhc2VzID0gc3VtKGNhc2VzKSkgJT4lCiAgZ2dwbG90KGFlcyh4ID0gZGF0ZSwgeSA9IHRvdGFsY2FzZXMpKSArIAogIGdlb21fcG9pbnQoKSAKCmBgYAoKCgoKCgoKYGBge3J9CiAgCkNPVklERmluYWwgJT4lCiAgZ3JvdXBfYnkoY291bnRyeSkgJT4lCiAgc3VtbWFyaXNlKGRhaWx5c3ByZWFkID0gbWVhbihkYWlseXNwcmVhZCkpICU+JQogIGFycmFuZ2UoZGVzYyhkYWlseXNwcmVhZCkpICU+JQogIGhlYWQoMjApICU+JQogIGdncGxvdChhZXMoeCA9IHJlb3JkZXIoY291bnRyeSwgZGVzYyhkYWlseXNwcmVhZCkpLCB5PSBkYWlseXNwcmVhZCkpICsKICBnZW9tX2JhcihzdGF0PSJpZGVudGl0eSIsIHBvc2l0aW9uID0gJ3N0YWNrJywgd2lkdGg9LjkpICsKICB0aGVtZShheGlzLnRleHQueD1lbGVtZW50X3RleHQoYW5nbGUgPSA2MCwgaGp1c3QgPSAxKSkgKwogIHNjYWxlX3lfY29udGludW91cyhsYWJlbHMgPSBmdW5jdGlvbih4KSBmb3JtYXQoeCwgc2NpZW50aWZpYyA9IEZBTFNFKSkgKwogIHhsYWIoIkNvdW50cnkiKSArCiAgeWxhYigiU3ByZWFkIG9mIENPVklELTE5IGNhc2VzIFBlciBEYXkiKQoKCgpgYGAKCgoKCgoKYGBge3J9CgpDT1ZJREZpbmFsICU+JQogIGdncGxvdChhZXMoeCA9IHBvcCwgeSA9IGRhaWx5c3ByZWFkKSkgKyAKICBnZW9tX3BvaW50KCkKCgpgYGAKCg==